#!/usr/bin/env python
# encoding: utf-8
"""
@summary: 计算wap端的suv数; 给定文件路径作为入口
@author: hongxingfan
@since: 2014年8月19日    下午5:32:07
"""
import os
import gzip
import sys

    

if __name__ == "__main__":
    suv_hash = {}
    # sys.argv[1] 输入的第一个参数 sys.argv[0]本文件的路径
    
    path = sys.argv[1]
    
    num = 0
    
    if os.path.exists(path):
        # 2.7以前的版本不支持这种方式
        with gzip.open(path, 'r') as pf:
            for line in pf:
                try:
                    cols = line.split("\",\"")
                    suv = cols[11]
                except IndexError:
                    print("index error: %s" % (cols))
                    continue
                if suv in suv_hash:
                    suv_hash[suv] += 1
                else:
                    suv_hash[suv] = 1
                    
                if num % 300000 == 0:
                    print("%d 条记录被处理" % (num))
                num += 1
    else:
        print('the path [{}] is not exist!'.format(path))
        sys.exit(0)
    
    # 输出
#     print("一共 %d 个suv" % (len(suv_hash)))
    for(k, v) in suv_hash.items():
        print("%s\t%d" % (k, v))
